Rem
Rem $Header: rdbms/demo/dmtxtnmf.sql /main/7 2012/04/15 16:31:57 xbarr Exp $
Rem
Rem dmtxtnmf.sql
Rem
Rem Copyright (c) 2003, 2012, Oracle and/or its affiliates. 
Rem All rights reserved. 
Rem
Rem    NAME
Rem      dmtxtnmf.sql - Sample program for the DBMS_DATA_MINING package.
Rem
Rem    DESCRIPTION
Rem      This script creates a text mining model
Rem      using non-negative matrix factorization. 
Rem
Rem    NOTES
Rem     
Rem
Rem    MODIFIED   (MM/DD/YY) 
Rem    amozes      01/23/12 - updates for 12c
Rem    ramkrish    10/25/07 - replace deprecated get_model calls with catalog
Rem                           queries
Rem    ktaylor     07/12/05 - minor edits to comments
Rem    ramkrish    10/28/04 - cleanup/comments
Rem    amozes      07/30/04 - format coefficient 
Rem    xbarr       06/25/04 - xbarr_dm_rdbms_migration
Rem    cbhagwat    10/17/03 - feature_extraction
Rem    cbhagwat    10/13/03 - cbhagwat_txn109175
Rem    cbhagwat    10/10/03 - fix
Rem    cbhagwat    10/08/03 - Creation
  
SET serveroutput ON
SET trimspool ON
SET pages 10000
SET echo ON

-----------------------------------------------------------------------
--                            SAMPLE PROBLEM
-----------------------------------------------------------------------
-- Mine text features using NMF algorithm. 

-----------------------------------------------------------------------
--                            SET UP AND ANALYZE THE DATA
-----------------------------------------------------------------------
-- Create a policy for text feature extraction
-- The policy will include stemming
begin
  ctx_ddl.drop_policy('dmdemo_nmf_policy');
exception when others then null;
end;
/
begin
  ctx_ddl.drop_preference('dmdemo_nmf_lexer');
exception when others then null;
end;
/
begin
  ctx_ddl.create_preference('dmdemo_nmf_lexer', 'BASIC_LEXER');
  ctx_ddl.set_attribute('dmdemo_nmf_lexer', 'index_stems', 'ENGLISH');
--  ctx_ddl.set_attribute('dmdemo_nmf_lexer', 'index_themes', 'YES');
end;
/
begin
  ctx_ddl.create_policy('dmdemo_nmf_policy', lexer=>'dmdemo_nmf_lexer');
end;
/

-----------------------------------------------------------------------
--                            BUILD THE MODEL
-----------------------------------------------------------------------

-- Cleanup old model and objects for repeat runs
BEGIN DBMS_DATA_MINING.DROP_MODEL('T_NMF_Sample');
EXCEPTION WHEN OTHERS THEN NULL; END;
/
BEGIN EXECUTE IMMEDIATE 'DROP TABLE t_nmf_sample_settings';
EXCEPTION WHEN OTHERS THEN NULL; END;
/

-- Create settings table to choose text policy and auto data prep
CREATE TABLE t_nmf_sample_settings (
  setting_name  VARCHAR2(30),
  setting_value VARCHAR2(4000));
 
BEGIN
  -- Populate settings table
  INSERT INTO t_nmf_sample_settings VALUES
    (dbms_data_mining.prep_auto, dbms_data_mining.prep_auto_on);
  INSERT INTO t_nmf_sample_settings VALUES(
    dbms_data_mining.odms_text_policy_name, 'DMDEMO_NMF_POLICY');
--(dbms_data_mining.nmfs_conv_tolerance,0.05);
--(dbms_data_mining.nmfs_num_iterations,50);
--(dbms_data_mining.nmfs_random_seed,-1);
--(dbms_data_mining.nmfs_stop_criteria,dbms_data_mining.nmfs_sc_iter_or_conv);
  COMMIT;
END;
/

---------------------
-- CREATE A NEW MODEL
-- Note the transform makes the 'comments' attribute 
-- to be treated as unstructured text data
--
DECLARE
  xformlist dbms_data_mining_transform.TRANSFORM_LIST;
BEGIN
  dbms_data_mining_transform.SET_TRANSFORM(
    xformlist, 'comments', null, 'comments', null, 'TEXT(TOKEN_TYPE:STEM)');
--    xformlist, 'comments', null, 'comments', null, 'TEXT(TOKEN_TYPE:THEME)');
  DBMS_DATA_MINING.CREATE_MODEL(
    model_name => 'T_NMF_Sample',
    mining_function => dbms_data_mining.feature_extraction,
    data_table_name => 'mining_build_text',
    case_id_column_name => 'cust_id',
    settings_table_name => 't_nmf_sample_settings',
    xform_list => xformlist);
END;
/
    
-------------------------
-- DISPLAY MODEL SETTINGS
--
column setting_name format a30;
column setting_value format a30;
SELECT setting_name, setting_value
  FROM user_mining_model_settings
 WHERE model_name = 'T_NMF_SAMPLE'
ORDER BY setting_name;

--------------------------
-- DISPLAY MODEL SIGNATURE
--
column attribute_name format a40
column attribute_type format a20
SELECT attribute_name, attribute_type
  FROM user_mining_model_attributes
 WHERE model_name = 'T_NMF_SAMPLE'
ORDER BY attribute_name;

------------------------
-- DISPLAY MODEL DETAILS
--
column attribute_name format a30;
column attribute_value format a20;
column coefficient format 9.99999;
set pages 15;
SET line 120;
break ON feature_id;

SELECT t.feature_id,
       nvl2(a.attribute_subname,
            a.attribute_name||'.'||a.attribute_subname,
            a.attribute_name) attribute_name,
       a.attribute_value,
       a.coefficient
  FROM TABLE(dbms_data_mining.get_model_details_nmf('T_NMF_Sample')) t,
       TABLE(t.attribute_set) a
WHERE feature_id < 3
ORDER BY 1,2,3,4;

-----------------------------------------------------------------------
--                               APPLY THE MODEL
-----------------------------------------------------------------------
-- See dmnmdemo.sql for examples.
